import pandas as pd
initial_df = pd.read_csv('../../data/ge.us.txt')
initial_df
| Day | Open | High | Low | Close | Volume | OpenInt | |
|---|---|---|---|---|---|---|---|
| 0 | 1962-01-02 | 0.6277 | 0.6362 | 0.6201 | 0.6201 | 2575579 | 0 |
| 1 | 1962-01-03 | 0.6201 | 0.6201 | 0.6122 | 0.6201 | 1764749 | 0 |
| 2 | 1962-01-04 | 0.6201 | 0.6201 | 0.6037 | 0.6122 | 2194010 | 0 |
| 3 | 1962-01-05 | 0.6122 | 0.6122 | 0.5798 | 0.5957 | 3255244 | 0 |
| 4 | 1962-01-08 | 0.5957 | 0.5957 | 0.5716 | 0.5957 | 3696430 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 14053 | 2017-11-06 | 20.5200 | 20.5300 | 20.0800 | 20.1300 | 60641787 | 0 |
| 14054 | 2017-11-07 | 20.1700 | 20.2500 | 20.1200 | 20.2100 | 41622851 | 0 |
| 14055 | 2017-11-08 | 20.2100 | 20.3200 | 20.0700 | 20.1200 | 39672190 | 0 |
| 14056 | 2017-11-09 | 20.0400 | 20.0710 | 19.8500 | 19.9900 | 50831779 | 0 |
| 14057 | 2017-11-10 | 19.9800 | 20.6800 | 19.9000 | 20.4900 | 100698474 | 0 |
14058 rows × 7 columns
# Keep only columns Day and Close
initial_df = initial_df[["Day", "Close"]]
initial_df
| Day | Close | |
|---|---|---|
| 0 | 1962-01-02 | 0.6201 |
| 1 | 1962-01-03 | 0.6201 |
| 2 | 1962-01-04 | 0.6122 |
| 3 | 1962-01-05 | 0.5957 |
| 4 | 1962-01-08 | 0.5957 |
| ... | ... | ... |
| 14053 | 2017-11-06 | 20.1300 |
| 14054 | 2017-11-07 | 20.2100 |
| 14055 | 2017-11-08 | 20.1200 |
| 14056 | 2017-11-09 | 19.9900 |
| 14057 | 2017-11-10 | 20.4900 |
14058 rows × 2 columns
import plotly.express as px
fig = px.line(initial_df, x = "Day", y = "Close", title = "GE Stock")
fig.update_layout(yaxis_title="Close Value")
fig
from sklearn.preprocessing import StandardScaler
import numpy as np
scaler = StandardScaler()
df = initial_df.copy()
df['Close'] = np.sqrt(df['Close'])
df[['Close']] = scaler.fit_transform(df[['Close']])
#keep centers for denormalization later
center_history = scaler.mean_[0]
scale_history = scaler.scale_[0]
print(center_history)
print(scale_history)
2.677744836440367 1.8340784204156466
ge_trn = df[0:12000] #training
ge_val = df[12000:13000] #validation
ge_test = df[13000:14053] #test for deletion
n_inputs = 10 #number of inputs in the RNN e.g. 1st it. use first 10 days to predict the 11th
n_predictions = 1
batch_size = 10
#tranforms data into the windows of (10+1) 14,000x11
def build_windowed_matrix(data, timesteps):
windowed_matrix = np.vstack(
[data[x : (x + timesteps)] for x in range(len(data) - timesteps + 1)])
return windowed_matrix
#to do it 14kx11x1 since this is required by keras. f it was multivariate (n) it should be 14kx11xn
def reshape_3D(df):
return np.reshape(df, (df.shape[0], df.shape[1], 1))
#for each row gets the the x's (10 in number)
def get_x(mtx, n_inputs, batch_size):
mtx = mtx[:, 0:n_inputs]
mtx = mtx[0:(mtx.shape[0] // batch_size * batch_size), :]
reshaped_mtx = reshape_3D(mtx)
return reshaped_mtx
#for each row gets the the y (11th element) + put them in 3D
def get_y(mtx, n_inputs, n_predictions, batch_size):
mtx = mtx[:, n_inputs : (n_inputs + n_predictions)]
if (n_inputs + n_predictions) < batch_size:
mtx = mtx[0:(mtx.shape[0] // batch_size * batch_size),:]
if n_predictions == 1:
mtx = np.reshape(mtx, (len(mtx), 1))
reshaped_mtx = reshape_3D(mtx)
return reshaped_mtx
trn = ge_trn['Close'].values.flatten()
val = ge_val['Close'].values.flatten()
test = ge_test['Close'].values.flatten()
trn_mtx = build_windowed_matrix(trn, n_inputs+n_predictions)
val_mtx = build_windowed_matrix(val, n_inputs+n_predictions)
test_mtx = build_windowed_matrix(test, n_inputs+n_predictions)
X_train = get_x(trn_mtx, n_inputs, batch_size)
Y_train = get_y(trn_mtx, n_inputs, n_predictions, batch_size)
X_val = get_x(val_mtx, n_inputs, batch_size)
Y_val = get_y(val_mtx, n_inputs, n_predictions, batch_size)
X_test = get_x(test_mtx, n_inputs, batch_size)
Y_test = get_y(test_mtx, n_inputs, n_predictions, batch_size)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
model = keras.Sequential()
model.add(layers.LSTM(units = 64, batch_input_shape=(batch_size, n_inputs, n_predictions)))
model.add(layers.Dense(units = 1))
model.compile(
loss=keras.losses.logcosh,
optimizer="sgd",
metrics=["MeanSquaredError"],
)
model.summary()
callbacks = [
EarlyStopping(patience=10)
]
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm (LSTM) (10, 64) 16896
dense (Dense) (10, 1) 65
=================================================================
Total params: 16,961
Trainable params: 16,961
Non-trainable params: 0
_________________________________________________________________
history = model.fit(x = X_train, y = Y_train, validation_data = [X_val, Y_val], batch_size = batch_size, epochs = 100,
callbacks = callbacks)
Epoch 1/100 1199/1199 [==============================] - 8s 5ms/step - loss: 0.0140 - mean_squared_error: 0.0320 - val_loss: 0.0030 - val_mean_squared_error: 0.0060 Epoch 2/100 1199/1199 [==============================] - 6s 5ms/step - loss: 0.0011 - mean_squared_error: 0.0022 - val_loss: 0.0022 - val_mean_squared_error: 0.0044 Epoch 3/100 1199/1199 [==============================] - 6s 5ms/step - loss: 8.1171e-04 - mean_squared_error: 0.0016 - val_loss: 9.5353e-04 - val_mean_squared_error: 0.0019 Epoch 4/100 1199/1199 [==============================] - 6s 5ms/step - loss: 6.8334e-04 - mean_squared_error: 0.0014 - val_loss: 8.8800e-04 - val_mean_squared_error: 0.0018 Epoch 5/100 1199/1199 [==============================] - 6s 5ms/step - loss: 6.0549e-04 - mean_squared_error: 0.0012 - val_loss: 8.2734e-04 - val_mean_squared_error: 0.0017 Epoch 6/100 1199/1199 [==============================] - 5s 5ms/step - loss: 5.5050e-04 - mean_squared_error: 0.0011 - val_loss: 7.5139e-04 - val_mean_squared_error: 0.0015 Epoch 7/100 1199/1199 [==============================] - 6s 5ms/step - loss: 5.1089e-04 - mean_squared_error: 0.0010 - val_loss: 6.4755e-04 - val_mean_squared_error: 0.0013 Epoch 8/100 1199/1199 [==============================] - 6s 5ms/step - loss: 4.7804e-04 - mean_squared_error: 9.5801e-04 - val_loss: 6.0350e-04 - val_mean_squared_error: 0.0012 Epoch 9/100 1199/1199 [==============================] - 5s 5ms/step - loss: 4.5356e-04 - mean_squared_error: 9.0888e-04 - val_loss: 5.8911e-04 - val_mean_squared_error: 0.0012 Epoch 10/100 1199/1199 [==============================] - 6s 5ms/step - loss: 4.3285e-04 - mean_squared_error: 8.6734e-04 - val_loss: 5.0282e-04 - val_mean_squared_error: 0.0010 Epoch 11/100 1199/1199 [==============================] - 6s 5ms/step - loss: 4.1460e-04 - mean_squared_error: 8.3073e-04 - val_loss: 4.1915e-04 - val_mean_squared_error: 8.3872e-04 Epoch 12/100 1199/1199 [==============================] - 6s 5ms/step - loss: 3.9963e-04 - mean_squared_error: 8.0069e-04 - val_loss: 5.7115e-04 - val_mean_squared_error: 0.0011 Epoch 13/100 1199/1199 [==============================] - 6s 5ms/step - loss: 3.8656e-04 - mean_squared_error: 7.7449e-04 - val_loss: 4.6898e-04 - val_mean_squared_error: 9.3845e-04 Epoch 14/100 1199/1199 [==============================] - 6s 5ms/step - loss: 3.7532e-04 - mean_squared_error: 7.5195e-04 - val_loss: 4.3978e-04 - val_mean_squared_error: 8.8001e-04 Epoch 15/100 1199/1199 [==============================] - 7s 6ms/step - loss: 3.6518e-04 - mean_squared_error: 7.3161e-04 - val_loss: 5.2003e-04 - val_mean_squared_error: 0.0010 Epoch 16/100 1199/1199 [==============================] - 7s 5ms/step - loss: 3.5610e-04 - mean_squared_error: 7.1341e-04 - val_loss: 3.8942e-04 - val_mean_squared_error: 7.7921e-04 Epoch 17/100 1199/1199 [==============================] - 6s 5ms/step - loss: 3.4812e-04 - mean_squared_error: 6.9741e-04 - val_loss: 4.9455e-04 - val_mean_squared_error: 9.8964e-04 Epoch 18/100 1199/1199 [==============================] - 6s 5ms/step - loss: 3.4025e-04 - mean_squared_error: 6.8162e-04 - val_loss: 3.7894e-04 - val_mean_squared_error: 7.5823e-04 Epoch 19/100 1199/1199 [==============================] - 6s 5ms/step - loss: 3.3395e-04 - mean_squared_error: 6.6900e-04 - val_loss: 3.7507e-04 - val_mean_squared_error: 7.5050e-04 Epoch 20/100 1199/1199 [==============================] - 6s 5ms/step - loss: 3.2855e-04 - mean_squared_error: 6.5818e-04 - val_loss: 3.8867e-04 - val_mean_squared_error: 7.7772e-04 Epoch 21/100 1199/1199 [==============================] - 6s 5ms/step - loss: 3.2289e-04 - mean_squared_error: 6.4683e-04 - val_loss: 4.2802e-04 - val_mean_squared_error: 8.5647e-04 Epoch 22/100 1199/1199 [==============================] - 6s 5ms/step - loss: 3.1773e-04 - mean_squared_error: 6.3648e-04 - val_loss: 4.1484e-04 - val_mean_squared_error: 8.3010e-04 Epoch 23/100 1199/1199 [==============================] - 6s 5ms/step - loss: 3.1265e-04 - mean_squared_error: 6.2631e-04 - val_loss: 3.4836e-04 - val_mean_squared_error: 6.9703e-04 Epoch 24/100 1199/1199 [==============================] - 6s 5ms/step - loss: 3.0905e-04 - mean_squared_error: 6.1908e-04 - val_loss: 4.0774e-04 - val_mean_squared_error: 8.1587e-04 Epoch 25/100 1199/1199 [==============================] - 6s 5ms/step - loss: 3.0492e-04 - mean_squared_error: 6.1082e-04 - val_loss: 3.7052e-04 - val_mean_squared_error: 7.4140e-04 Epoch 26/100 1199/1199 [==============================] - 6s 5ms/step - loss: 3.0046e-04 - mean_squared_error: 6.0184e-04 - val_loss: 3.6940e-04 - val_mean_squared_error: 7.3915e-04 Epoch 27/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.9738e-04 - mean_squared_error: 5.9569e-04 - val_loss: 3.8177e-04 - val_mean_squared_error: 7.6391e-04 Epoch 28/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.9372e-04 - mean_squared_error: 5.8835e-04 - val_loss: 3.5979e-04 - val_mean_squared_error: 7.1991e-04 Epoch 29/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.9065e-04 - mean_squared_error: 5.8220e-04 - val_loss: 3.7945e-04 - val_mean_squared_error: 7.5926e-04 Epoch 30/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.8758e-04 - mean_squared_error: 5.7605e-04 - val_loss: 3.8973e-04 - val_mean_squared_error: 7.7983e-04 Epoch 31/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.8526e-04 - mean_squared_error: 5.7140e-04 - val_loss: 3.4501e-04 - val_mean_squared_error: 6.9032e-04 Epoch 32/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.8166e-04 - mean_squared_error: 5.6418e-04 - val_loss: 3.2166e-04 - val_mean_squared_error: 6.4360e-04 Epoch 33/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.7964e-04 - mean_squared_error: 5.6015e-04 - val_loss: 3.5519e-04 - val_mean_squared_error: 7.1069e-04 Epoch 34/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.7709e-04 - mean_squared_error: 5.5502e-04 - val_loss: 3.7536e-04 - val_mean_squared_error: 7.5108e-04 Epoch 35/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.7478e-04 - mean_squared_error: 5.5040e-04 - val_loss: 3.5360e-04 - val_mean_squared_error: 7.0752e-04 Epoch 36/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.7267e-04 - mean_squared_error: 5.4616e-04 - val_loss: 3.5248e-04 - val_mean_squared_error: 7.0528e-04 Epoch 37/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.7023e-04 - mean_squared_error: 5.4128e-04 - val_loss: 3.8327e-04 - val_mean_squared_error: 7.6690e-04 Epoch 38/100 1199/1199 [==============================] - 7s 6ms/step - loss: 2.6846e-04 - mean_squared_error: 5.3775e-04 - val_loss: 3.1528e-04 - val_mean_squared_error: 6.3082e-04 Epoch 39/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.6640e-04 - mean_squared_error: 5.3359e-04 - val_loss: 3.1396e-04 - val_mean_squared_error: 6.2818e-04 Epoch 40/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.6495e-04 - mean_squared_error: 5.3071e-04 - val_loss: 3.2290e-04 - val_mean_squared_error: 6.4609e-04 Epoch 41/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.6245e-04 - mean_squared_error: 5.2566e-04 - val_loss: 3.1376e-04 - val_mean_squared_error: 6.2779e-04 Epoch 42/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.6123e-04 - mean_squared_error: 5.2325e-04 - val_loss: 3.3937e-04 - val_mean_squared_error: 6.7904e-04 Epoch 43/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.5928e-04 - mean_squared_error: 5.1934e-04 - val_loss: 2.8976e-04 - val_mean_squared_error: 5.7976e-04 Epoch 44/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.5798e-04 - mean_squared_error: 5.1672e-04 - val_loss: 3.0530e-04 - val_mean_squared_error: 6.1085e-04 Epoch 45/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.5598e-04 - mean_squared_error: 5.1272e-04 - val_loss: 3.0278e-04 - val_mean_squared_error: 6.0582e-04 Epoch 46/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.5508e-04 - mean_squared_error: 5.1092e-04 - val_loss: 2.9635e-04 - val_mean_squared_error: 5.9295e-04 Epoch 47/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.5291e-04 - mean_squared_error: 5.0656e-04 - val_loss: 2.9854e-04 - val_mean_squared_error: 5.9732e-04 Epoch 48/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.5185e-04 - mean_squared_error: 5.0445e-04 - val_loss: 3.4860e-04 - val_mean_squared_error: 6.9751e-04 Epoch 49/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.5085e-04 - mean_squared_error: 5.0244e-04 - val_loss: 3.1082e-04 - val_mean_squared_error: 6.2190e-04 Epoch 50/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.4887e-04 - mean_squared_error: 4.9847e-04 - val_loss: 2.9364e-04 - val_mean_squared_error: 5.8751e-04 Epoch 51/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.4780e-04 - mean_squared_error: 4.9633e-04 - val_loss: 2.9008e-04 - val_mean_squared_error: 5.8039e-04 Epoch 52/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.4674e-04 - mean_squared_error: 4.9421e-04 - val_loss: 3.1582e-04 - val_mean_squared_error: 6.3190e-04 Epoch 53/100 1199/1199 [==============================] - 6s 5ms/step - loss: 2.4565e-04 - mean_squared_error: 4.9202e-04 - val_loss: 3.2495e-04 - val_mean_squared_error: 6.5018e-04
pred_test = model.predict(X_test, batch_size = batch_size)
pred_test = (pred_test * scale_history + center_history) ** 2 #denormalization
pred_test
104/104 [==============================] - 1s 2ms/step
array([[20.93589 ],
[21.09217 ],
[21.305178],
...,
[22.345613],
[21.961597],
[21.569155]], dtype=float32)
final_df = initial_df[(13000+n_inputs):(13000+n_inputs+pred_test.shape[0])].copy()
final_df["predictions"] = pred_test
final_df
| Day | Close | predictions | |
|---|---|---|---|
| 13010 | 2013-09-17 | 21.222 | 20.935890 |
| 13011 | 2013-09-18 | 21.580 | 21.092171 |
| 13012 | 2013-09-19 | 21.397 | 21.305178 |
| 13013 | 2013-09-20 | 21.002 | 21.412287 |
| 13014 | 2013-09-23 | 21.240 | 21.381840 |
| ... | ... | ... | ... |
| 14045 | 2017-10-25 | 21.500 | 23.026583 |
| 14046 | 2017-10-26 | 21.320 | 22.664095 |
| 14047 | 2017-10-27 | 20.790 | 22.345613 |
| 14048 | 2017-10-30 | 20.410 | 21.961597 |
| 14049 | 2017-10-31 | 20.160 | 21.569155 |
1040 rows × 3 columns
fig = px.line(final_df, x = "Day", y = ["Close", "predictions"], title = "GE Stock")
fig.update_layout(yaxis_title="Close Value")
fig